##load R package: library(mice) ##check variables: summary(nhanes2) dat0<-nhanes2 n<-dim(dat0)[1] r1<-rep(1,n) r1[is.na(dat0$bmi)]<-0 r2<-rep(1,n) r2[is.na(dat0$hyp)]<-0 r3<-rep(1,n) r3[is.na(dat0$chl)]<-0 dat<-cbind(dat,r1,r2,r3) ##analysis after removing missing values: mean(dat$bmi) mean(dat$bmi,na.rm=T) var(dat$bmi) var(dat$bmi,na.rm=T) table(dat$hyp) mean(dat$chl) mean(dat$chl,na.rm=T) var(dat$chl) var(dat$chl,na.rm=T) table(dat$age) ##check missing pattern: library(naniar) vis_miss(dat) gg_miss_upset(dat) gg_miss_var(dat) ##check missing mechanism: library(dplyr) library(ggplot2) library(tidyr) library(scales) ggplot(dat, aes(x= age, group=r1)) + geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") + geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = -.5) + labs(y = "Percent", fill="age") + facet_grid(~r1) + scale_y_continuous(labels = scales::percent) ggplot(dat, aes(x= age, group=r2)) + geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") + geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = -.5) + labs(y = "Percent", fill="age") + facet_grid(~r2) + scale_y_continuous(labels = scales::percent) ggplot(dat, aes(x= age, group=r3)) + geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") + geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = -.5) + labs(y = "Percent", fill="age") + facet_grid(~r3) + scale_y_continuous(labels = scales::percent) fit1<-glm(r1~age,data=dat,family=binomial(link = "logit")) summary(fit1) fit2<-glm(r2~age,data=dat,family=binomial(link = "logit")) summary(fit2) fit3<-glm(r3~age,data=dat,family=binomial(link = "logit")) summary(fit3) ##imputation model check: pairs(dat[,c(2,4)]) par(mfrow=c(2,2)) boxplot(dat$bmi ~ dat$age, col="orange", main="Distribution of bmi", ylab="bmi", xlab="age") boxplot(dat$bmi ~ dat$hyp, col="blue", main="Distribution of bmi", ylab="bmi", xlab="hyp") boxplot(dat$chl ~ dat$age, col="orange", main="Distribution of chl", ylab="chl", xlab="age") boxplot(dat$chl ~ dat$hyp, col="red", main="Distribution of chl", ylab="chl", xlab="hyp") fit1_m<-glm(bmi~age,data=dat) summary(fit1_m) fit2_m<-glm(chl~age+bmi,data=dat) summary(fit2_m) fit3_m<-glm(hyp~age+bmi+chl,data=dat,family=binomial(link = "logit")) summary(fit3_m) ##imputation by using 'mice' package: datm<-mice(dat) ##pooled analysis by using Rubin's rule: fit <- with(data = datm, exp = lm(chl ~ age + bmi + hyp)) summary(pool(fit)) ##Propensity score weighting: #estimate propensity scores for responding: pfit1<-glm(r1~age,data=dat,family=binomial(link = "logit")) ep1<-pfit1$fitted.values ew1<-1/ep1 pfit2<-glm(r2~age,data=dat,family=binomial(link = "logit")) ep2<-pfit2$fitted.values ew2<-1/ep2 pfit3<-glm(r3~age,data=dat,family=binomial(link = "logit")) ep3<-pfit3$fitted.values ew3<-1/ep3 dat2<-cbind(dat,ew1,ew2,ew3) ##weighted analysis: a1<-glm(hyp~age,data=dat2,weights=ew2,family=binomial(link = "logit")) summary(a1)